#!/usr/bin/env python
# -*- coding:utf-8 -*-

# file:do_exam1.py
# author:王雯雯
# datetime:2024-12-7
# software: PyCharm
import re


url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')

input_file_path = "webspiderUrl.txt"  # 原始文件路径
output_file_path = "output.txt"  # 提取URL后保存的文件路径

with open(input_file_path, 'r', encoding='utf-8') as input_file, open(output_file_path, 'w', encoding='utf-8') as output_file:
    for line in input_file:
        # 在每一行中查找所有匹配的URL
        urls = url_pattern.findall(line)
        for url in urls:
            output_file.write(url + "\n")
